
import pandas as pd
import tensorflow as tf
from google.colab import drive
from collections import Counter
import math
import csv
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns
from scipy.stats import norm
plt.rcParams['figure.dpi']=300
plt.rcParams.update({'font.size':12})

data= pd.DataFrame(pd.read_csv('infos_feature_new.csv',skiprows=1,header=None))

t = data.iloc[:,14]
y= data.iloc[:,2]



interval=1.5
tn=Counter([i // interval for i in t])
cn={i:0 for i in tn.keys()}
for index, num in enumerate(y):
  cn[t[index]// interval]+=num
for index in cn:
  cn[index]/=tn[index]
xplot=[]
yplot=[]
for key in sorted(cn.keys()):
  xplot.append(key*interval)
  yplot.append(cn[key])
plt.figure()
plt.plot(xplot,yplot,label="avg impr",color="#35426A")
plt.xlabel('log(follower)')
plt.ylabel('average log(impr)')
#plt.legend()
tticks = np.arange(0,20)
yticks = np.arange(5,10.5)
plt.xticks(np.arange(0, 21, 2.5))
plt.xticks(np.arange(0, 21, 2.5))
plt.yticks(np.arange(0, 21, 2.5))
plt.show()
plt.scatter(t,y,color="steelblue",alpha=0.01)
tticks = np.arange(0,20)
yticks = np.arange(5,20)
plt.xlabel("log(Follower)")
plt.ylabel("log(Impression)")
plt.xlim([0, 20])
plt.show()

share=np.exp(data.iloc[:,20])
yh=np.exp(data.iloc[:,2]) #impr
yhh=[share[i]/yh[i]for i in range(len(share))]
y=yhh

interval=1.5
tn=Counter([i // interval for i in t])
cn={i:0 for i in tn.keys()}
for index, num in enumerate(y):
  cn[t[index]// interval]+=num
for index in cn:
  cn[index]/=tn[index]
xplot=[]
yplot=[]
for key in sorted(cn.keys()):
  xplot.append(key*interval)
  yplot.append(cn[key])
plt.figure().set_figwidth(10)
plt.plot(xplot,yplot,label="avg impr",color='#35426A')
plt.xlabel('log(follower)')
plt.ylabel('average log(shares)')
#plt.legend()
tticks = np.arange(0,20)
yticks = np.arange(5,10.5)
plt.xticks(np.arange(0, 17, 2.5))
plt.show()

plt.scatter(t,y,color="steelblue",alpha=0.05)
tticks = np.arange(0,20)
yticks = np.arange(0,1,0.1)
#plt.xticks(np.arange(0, 17, 2.5))
plt.yticks(np.arange(0, 0.6, 0.1))
plt.xlabel("log(follower)")
plt.ylabel("shares/impression")
print(xplot)
for i in yplot:
  print(i)
plt.xlim([0, 16])
plt.show()